

***************************************************************
** Table A-17 requires completing the following 7 steps,
** which are meant to aggregate pre- and post-war at variables
** at the historical district (landkreis) level.
***************************************************************


******************************************************************
** Step 1. The interwar electoral data in Hänisch 1989 are at four administrative levels: (1) District-level average for districts without municipalities with over 2000 inhabitants (KREISE O.GEMEINDEN >2000), (2) District-level average for all smaller municipalities within a district that has municipalities with more than 2000 inhabitants (RESTKREISE (GEM.< 2000), (3) Separate entries for each municipality that has over 2000 inhabitants (GEMEINDEN AB 2000 E), and (4) separate entries for each city in Bavaria ("Stadtkreise"). Cities are excluded from our sample of rural municipalities. In this step we create a dataset that lists the vote share and total population in each administrative unit 1-3.
******************************************************************

clear all

** set path
	 cd ""


use "2_historical_gemeinde_1949_1969.dta", clear

** Keep relevant variables
keep id_hist_landkreis id_hist_gemeinde name_hist_gemeinde s_agglvl population_1933 cath_*

** Subset sample to bigger municipalities
	keep if s_agglvl == "GEMEINDEN AB 2000 E."
	* Identify bigger towns in each landkreis 
	gen bigger_town = 1 if s_agglvl == "GEMEINDEN AB 2000 E."
	sort id_hist_landkreis
	order id_hist_landkreis s_agglvl	
	sort id_hist_landkreis
	* Assign a unique id to larger municipality
	bysort id_hist_landkreis: gen id_biggertown = _n if bigger_town == 1
	order id_hist_landkreis s_agglvl id_biggertown
	drop bigger_town
	save "only_bigger_towns.dta", replace

use "2_historical_gemeinde_1949_1969.dta", clear

** Drop bigger municipalities and replace by dataset with unique ids
	drop if s_agglvl == "GEMEINDEN AB 2000 E."
	append using "only_bigger_towns.dta"

** Unit of aggregation within a given district
	gen level = . 
	replace level = 1 if s_agglvl == "KREISE O.GEMEINDEN >"
	replace level = 2 if s_agglvl == "RESTKREISE (GEM.< 20"
	replace level = 3 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 1
	replace level = 4 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 2
	replace level = 5 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 3
	replace level = 6 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 4
	replace level = 7 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 5
	replace level = 8 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 6


* Population for each level within a given district
bysort id_hist_landkreis: egen total_pop_1 = total(population_1933) if level == 1
bysort id_hist_landkreis: egen total_pop_2 = total(population_1933) if level == 2
bysort id_hist_landkreis: egen total_pop_3 = total(population_1933) if level == 3
bysort id_hist_landkreis: egen total_pop_4 = total(population_1933) if level == 4
bysort id_hist_landkreis: egen total_pop_5 = total(population_1933) if level == 5
bysort id_hist_landkreis: egen total_pop_6 = total(population_1933) if level == 6
bysort id_hist_landkreis: egen total_pop_7 = total(population_1933) if level == 7
bysort id_hist_landkreis: egen total_pop_8 = total(population_1933) if level == 8

bysort id_hist_landkreis: egen max_total_pop_1 = max(total_pop_1)
replace max_total_pop_1 = 0 if max_total_pop_1 ==. 
bysort id_hist_landkreis: egen max_total_pop_2 = max(total_pop_2)
replace max_total_pop_2 = 0 if max_total_pop_2 ==. 
bysort id_hist_landkreis: egen max_total_pop_3 = max(total_pop_3)
replace max_total_pop_3 = 0 if max_total_pop_3 ==. 
bysort id_hist_landkreis: egen max_total_pop_4 = max(total_pop_4)
replace max_total_pop_4 = 0 if max_total_pop_4 ==. 
bysort id_hist_landkreis: egen max_total_pop_5 = max(total_pop_5)
replace max_total_pop_5 = 0 if max_total_pop_5 ==. 
bysort id_hist_landkreis: egen max_total_pop_6 = max(total_pop_6)
replace max_total_pop_6 = 0 if max_total_pop_6 ==. 
bysort id_hist_landkreis: egen max_total_pop_7 = max(total_pop_7)
replace max_total_pop_7 = 0 if max_total_pop_7 ==. 
bysort id_hist_landkreis: egen max_total_pop_8 = max(total_pop_8)
replace max_total_pop_8 = 0 if max_total_pop_8 ==. 

* Total district population
gen landkreis_tot_pop = max_total_pop_1 + max_total_pop_2 + max_total_pop_3 + max_total_pop_4 + max_total_pop_5 + max_total_pop_6 + max_total_pop_7 + max_total_pop_8

* Population weights for each level
gen weight_1 = max_total_pop_1/landkreis_tot_pop
gen weight_2 = max_total_pop_2/landkreis_tot_pop
gen weight_3 = max_total_pop_3/landkreis_tot_pop
gen weight_4 = max_total_pop_4/landkreis_tot_pop
gen weight_5 = max_total_pop_5/landkreis_tot_pop
gen weight_6 = max_total_pop_6/landkreis_tot_pop
gen weight_7 = max_total_pop_7/landkreis_tot_pop
gen weight_8 = max_total_pop_8/landkreis_tot_pop

sum weight_1 weight_2 weight_3 weight_4 weight_5 weight_6 weight_7 weight_8

* Data cleaning
order id_hist_gemeinde name_hist_gemeinde id_hist_landkreis level weigh*
drop total_pop* population_1933

** Collapse data by keeping level for each district
	bysort id_hist_landkreis level: gen case = _n
	order id_hist_landkreis case level
	keep if case == 1
	drop case


****************************************************************
** Step 2
** Compute population-weighted Catholic vote by election-year
****************************************************************


*** 1920 - population weighted Catholic vote at historical district

gen w1_cath_20_w = cath_20*weight_1 if level ==1
gen w2_cath_20_w = cath_20*weight_2 if level ==2
gen w3_cath_20_w = cath_20*weight_3 if level ==3
gen w4_cath_20_w = cath_20*weight_4 if level ==4
gen w5_cath_20_w = cath_20*weight_5 if level ==5
gen w6_cath_20_w = cath_20*weight_6 if level ==6
gen w7_cath_20_w = cath_20*weight_7 if level ==7
gen w8_cath_20_w = cath_20*weight_8 if level ==8

gen w_combined = w1_cath_20_w
replace w_combined = w2_cath_20_w if w_combined == . 
replace w_combined = w3_cath_20_w if w_combined == . 
replace w_combined = w4_cath_20_w if w_combined == . 
replace w_combined = w5_cath_20_w if w_combined == . 
replace w_combined = w6_cath_20_w if w_combined == . 
replace w_combined = w7_cath_20_w if w_combined == . 
replace w_combined = w8_cath_20_w if w_combined == . 

bysort id_hist_landkreis: egen w_end_cath_20 = total(w_combined)

** Clean up
drop w1_cath_20_w w2_cath_20_w w3_cath_20_w w4_cath_20_w w5_cath_20_w w6_cath_20_w w7_cath_20_w w8_cath_20_w w_combined


*** 1924 may - population weighted Catholic vote at historical district

gen w1_cath_24_may = cath_24_may*weight_1 if level ==1
gen w2_cath_24_may = cath_24_may*weight_2 if level ==2
gen w3_cath_24_may = cath_24_may*weight_3 if level ==3
gen w4_cath_24_may = cath_24_may*weight_4 if level ==4
gen w5_cath_24_may = cath_24_may*weight_5 if level ==5
gen w6_cath_24_may = cath_24_may*weight_6 if level ==6
gen w7_cath_24_may = cath_24_may*weight_7 if level ==7
gen w8_cath_24_may = cath_24_may*weight_8 if level ==8

gen w_combined = w1_cath_24_may
replace w_combined = w2_cath_24_may if w_combined == . 
replace w_combined = w3_cath_24_may if w_combined == . 
replace w_combined = w4_cath_24_may if w_combined == . 
replace w_combined = w5_cath_24_may if w_combined == . 
replace w_combined = w6_cath_24_may if w_combined == . 
replace w_combined = w7_cath_24_may if w_combined == . 
replace w_combined = w8_cath_24_may if w_combined == . 

bysort id_hist_landkreis: egen w_end_cath_24_may = total(w_combined)

** Clean up
drop w1_cath_24_may w2_cath_24_may w3_cath_24_may w4_cath_24_may w5_cath_24_may w6_cath_24_may w7_cath_24_may w8_cath_24_may w_combined

*** 1924 december - population weighted Catholic vote at historical district

gen w1_cath_24_dec = cath_24_dec*weight_1 if level ==1
gen w2_cath_24_dec = cath_24_dec*weight_2 if level ==2
gen w3_cath_24_dec = cath_24_dec*weight_3 if level ==3
gen w4_cath_24_dec = cath_24_dec*weight_4 if level ==4
gen w5_cath_24_dec = cath_24_dec*weight_5 if level ==5
gen w6_cath_24_dec = cath_24_dec*weight_6 if level ==6
gen w7_cath_24_dec = cath_24_dec*weight_7 if level ==7
gen w8_cath_24_dec = cath_24_dec*weight_8 if level ==8

gen w_combined = w1_cath_24_dec
replace w_combined = w2_cath_24_dec if w_combined == . 
replace w_combined = w3_cath_24_dec if w_combined == . 
replace w_combined = w4_cath_24_dec if w_combined == . 
replace w_combined = w5_cath_24_dec if w_combined == . 
replace w_combined = w6_cath_24_dec if w_combined == . 
replace w_combined = w7_cath_24_dec if w_combined == . 
replace w_combined = w8_cath_24_dec if w_combined == . 


bysort id_hist_landkreis: egen w_end_cath_24_dec = total(w_combined)

** Clean up
drop w1_cath_24_dec w2_cath_24_dec w3_cath_24_dec w4_cath_24_dec w5_cath_24_dec w6_cath_24_dec w7_cath_24_dec w8_cath_24_dec w_combined

*** 1928 - population weighted Catholic vote at historical district

gen w1_cath_28_w = cath_28*weight_1 if level ==1
gen w2_cath_28_w = cath_28*weight_2 if level ==2
gen w3_cath_28_w = cath_28*weight_3 if level ==3
gen w4_cath_28_w = cath_28*weight_4 if level ==4
gen w5_cath_28_w = cath_28*weight_5 if level ==5
gen w6_cath_28_w = cath_28*weight_6 if level ==6
gen w7_cath_28_w = cath_28*weight_7 if level ==7
gen w8_cath_28_w = cath_28*weight_8 if level ==8

gen w_combined = w1_cath_28_w
replace w_combined = w2_cath_28_w if w_combined == . 
replace w_combined = w3_cath_28_w if w_combined == . 
replace w_combined = w4_cath_28_w if w_combined == . 
replace w_combined = w5_cath_28_w if w_combined == . 
replace w_combined = w6_cath_28_w if w_combined == . 
replace w_combined = w7_cath_28_w if w_combined == . 
replace w_combined = w8_cath_28_w if w_combined == . 

bysort id_hist_landkreis: egen w_end_cath_28 = total(w_combined)

** Clean up
drop w1_cath_28_w w2_cath_28_w w3_cath_28_w w4_cath_28_w w5_cath_28_w w6_cath_28_w w7_cath_28_w w8_cath_28_w w_combined


*** 1930 - population weighted Catholic Vote at historical district

gen w1_cath_30_w = cath_30*weight_1 if level ==1
gen w2_cath_30_w = cath_30*weight_2 if level ==2
gen w3_cath_30_w = cath_30*weight_3 if level ==3
gen w4_cath_30_w = cath_30*weight_4 if level ==4
gen w5_cath_30_w = cath_30*weight_5 if level ==5
gen w6_cath_30_w = cath_30*weight_6 if level ==6
gen w7_cath_30_w = cath_30*weight_7 if level ==7
gen w8_cath_30_w = cath_30*weight_8 if level ==8


gen w_combined = w1_cath_30_w
replace w_combined = w2_cath_30_w if w_combined == . 
replace w_combined = w3_cath_30_w if w_combined == . 
replace w_combined = w4_cath_30_w if w_combined == . 
replace w_combined = w5_cath_30_w if w_combined == . 
replace w_combined = w6_cath_30_w if w_combined == . 
replace w_combined = w7_cath_30_w if w_combined == . 
replace w_combined = w8_cath_30_w if w_combined == . 

bysort id_hist_landkreis: egen w_end_cath_30 = total(w_combined)

** Clean up
drop w1_cath_30_w w2_cath_30_w w3_cath_30_w w4_cath_30_w w5_cath_30_w w6_cath_30_w w7_cath_30_w w8_cath_30_w w_combined


*** 1932 July - population weighted Catholic vote at historical district

gen w1_cath_32_july_w = cath_32_july*weight_1 if level ==1
gen w2_cath_32_july_w = cath_32_july*weight_2 if level ==2
gen w3_cath_32_july_w = cath_32_july*weight_3 if level ==3
gen w4_cath_32_july_w = cath_32_july*weight_4 if level ==4
gen w5_cath_32_july_w = cath_32_july*weight_5 if level ==5
gen w6_cath_32_july_w = cath_32_july*weight_6 if level ==6
gen w7_cath_32_july_w = cath_32_july*weight_7 if level ==7
gen w8_cath_32_july_w = cath_32_july*weight_8 if level ==8

gen w_combined = w1_cath_32_july_w
replace w_combined = w2_cath_32_july_w if w_combined == . 
replace w_combined = w3_cath_32_july_w if w_combined == . 
replace w_combined = w4_cath_32_july_w if w_combined == . 
replace w_combined = w5_cath_32_july_w if w_combined == . 
replace w_combined = w6_cath_32_july_w if w_combined == . 
replace w_combined = w7_cath_32_july_w if w_combined == . 
replace w_combined = w8_cath_32_july_w if w_combined == . 

bysort id_hist_landkreis: egen w_end_cath_32_july = total(w_combined)

** Clean up
drop w1_cath_32_july_w w2_cath_32_july_w w3_cath_32_july_w w4_cath_32_july_w w5_cath_32_july_w w6_cath_32_july_w w7_cath_32_july_w w8_cath_32_july_w w_combined


*** 1932 Nov - population weighted Catholic vote at historical district

gen w1_cath_32_nov_w = cath_32_nov*weight_1 if level ==1
gen w2_cath_32_nov_w = cath_32_nov*weight_2 if level ==2
gen w3_cath_32_nov_w = cath_32_nov*weight_3 if level ==3
gen w4_cath_32_nov_w = cath_32_nov*weight_4 if level ==4
gen w5_cath_32_nov_w = cath_32_nov*weight_5 if level ==5
gen w6_cath_32_nov_w = cath_32_nov*weight_6 if level ==6
gen w7_cath_32_nov_w = cath_32_nov*weight_7 if level ==7
gen w8_cath_32_nov_w = cath_32_nov*weight_8 if level ==8

gen w_combined = w1_cath_32_nov_w
replace w_combined = w2_cath_32_nov_w if w_combined == . 
replace w_combined = w3_cath_32_nov_w if w_combined == . 
replace w_combined = w4_cath_32_nov_w if w_combined == . 
replace w_combined = w5_cath_32_nov_w if w_combined == . 
replace w_combined = w6_cath_32_nov_w if w_combined == . 
replace w_combined = w7_cath_32_nov_w if w_combined == . 
replace w_combined = w8_cath_32_nov_w if w_combined == . 

bysort id_hist_landkreis: egen w_end_cath_32_nov = total(w_combined)

** Clean up
drop w1_cath_32_nov_w w2_cath_32_nov_w w3_cath_32_nov_w w4_cath_32_nov_w w5_cath_32_nov_w w6_cath_32_nov_w w7_cath_32_nov_w w8_cath_32_nov_w w_combined


*** 1933 - population weighted Catholic vote at historical district

gen w1_cath_33_w = cath_33*weight_1 if level ==1
gen w2_cath_33_w = cath_33*weight_2 if level ==2
gen w3_cath_33_w = cath_33*weight_3 if level ==3
gen w4_cath_33_w = cath_33*weight_4 if level ==4
gen w5_cath_33_w = cath_33*weight_5 if level ==5
gen w6_cath_33_w = cath_33*weight_6 if level ==6
gen w7_cath_33_w = cath_33*weight_7 if level ==7
gen w8_cath_33_w = cath_33*weight_8 if level ==8

gen w_combined = w1_cath_33_w
replace w_combined = w2_cath_33_w if w_combined == . 
replace w_combined = w3_cath_33_w if w_combined == . 
replace w_combined = w4_cath_33_w if w_combined == . 
replace w_combined = w5_cath_33_w if w_combined == . 
replace w_combined = w6_cath_33_w if w_combined == . 
replace w_combined = w7_cath_33_w if w_combined == . 
replace w_combined = w8_cath_33_w if w_combined == . 

bysort id_hist_landkreis: egen w_end_cath_33 = total(w_combined)

** Clean up
drop w1_cath_33_w w2_cath_33_w w3_cath_33_w w4_cath_33_w w5_cath_33_w w6_cath_33_w w7_cath_33_w w8_cath_33_w w_combined


* Sum statistics for weighed variables
sum w_end_cath_20 w_end_cath_24_may w_end_cath_24_dec w_end_cath_28 w_end_cath_30 w_end_cath_32_july w_end_cath_32_nov w_end_cath_33


** Note to coder: ahead of "reshape" command, we rename two variables: 
** (1) The "May 1924 election" is labelled as if it had been 
** the only election in 1924, and the "December 1924 election"
** is labelled as if it had been held in 1925; and (2) the
** "1932 July election" is labelled as if it had occured in 
** 1931, and the "Nov 1932 election" is labelled as it 
** had been the only election in 1932. Notice this notational  
** decision has no substantive impact on the analyses. 

rename w_end_cath_24_may w_end_cath_24
rename w_end_cath_24_dec w_end_cath_25
rename w_end_cath_32_july w_end_cath_31
rename w_end_cath_32_nov w_end_cath_32

** For each historical district, we keep one observation per election. 
bysort id_hist_landkreis: gen case = _n
keep if case == 1

keep id_hist_landkreis w_end_cath_20 w_end_cath_24 w_end_cath_25 w_end_cath_28 w_end_cath_30 w_end_cath_31 w_end_cath_32  w_end_cath_33

save "did_prewar.dta", replace



****************************************************************
** Step 3
** Aggregate post-war variables at the historical district, 
** beginning with Catholic vote (1949-1969), then repression, 
** and lastly, the controls.
****************************************************************

use "2_historical_gemeinde_1949_1969.dta", clear

* Select relevant variables
keep id_hist_gemeinde id_hist_landkreis csu_* total_votes_* treated_town_cont population_1933 eligible_voters*

order id_hist_landkreis id_hist_gemeinde

sort id_hist_landkreis

** 1949

	* CSU nominal votes at municipality level
	gen tvotescsu_1949 = csu_1949_histgem*total_votes_1949_histgem/100
	* Total CSU votes at historical district
	bysort id_hist_landkreis: egen county_csuvotes_1949 = total(tvotescsu)
	* Total votes at historical district
	bysort id_hist_landkreis: egen county_tvotes_1949 = total(total_votes_1949_histgem)		
	* Share of CSU votes at historical district
	bysort id_hist_landkreis: gen county_csushare_1949 = 100*county_csuvotes_1949/county_tvotes_1949
	

** 1953

	* CSU nominal votes at municipality level
	gen tvotescsu_1953 = csu_1953_histgem*total_votes_1953/100
	* Total CSU votes at historical district
	bysort id_hist_landkreis: egen county_csuvotes_1953 = total(tvotescsu_1953)
	* Total votes at historical district
	bysort id_hist_landkreis: egen county_tvotes_1953 = total(total_votes_1953_histgem)
	* Share of CSU votes at historical district
	bysort id_hist_landkreis: gen county_csushare_1953 = 100*county_csuvotes_1953/county_tvotes_1953
	

** 1957

	* CSU nominal votes at municipality level
	gen tvotescsu_1957 = csu_1957_histgem*total_votes_1957_histgem/100
	* Total CSU votes at historical district
	bysort id_hist_landkreis: egen county_csuvotes_1957 = total(tvotescsu_1957)
	* Total votes at historical district
	bysort id_hist_landkreis: egen county_tvotes_1957 = total(total_votes_1957_histgem)
	* Share of CSU votes at historical district
	bysort id_hist_landkreis: gen county_csushare_1957 = 100*county_csuvotes_1957/county_tvotes_1957

** 1961

	* CSU nominal votes at municipality level
	gen tvotescsu_1961 = csu_1961_histgem*total_votes_1961_histgem/100
	* Total CSU votes at historical district
	bysort id_hist_landkreis: egen county_csuvotes_1961 = total(tvotescsu_1961)
	* Total votes at historical district
	bysort id_hist_landkreis: egen county_tvotes_1961 = total(total_votes_1961_histgem)
	* Share of CSU votes at historical district
	bysort id_hist_landkreis: gen county_csushare_1961 = 100*county_csuvotes_1961/county_tvotes_1961
	

** 1965

	* CSU nominal votes at municipality level
	gen tvotescsu_1965 = csu_1965_histgem*total_votes_1965_histgem/100
	* Total CSU votes at historical district
	bysort id_hist_landkreis: egen county_csuvotes_1965 = total(tvotescsu_1965)
	* Total votes at historical district
	bysort id_hist_landkreis: egen county_tvotes_1965 = total(total_votes_1965_histgem)	
	* Share of CSU votes at historical district
	bysort id_hist_landkreis: gen county_csushare_1965 = 100*county_csuvotes_1965/county_tvotes_1965
	
** 1969

	* CSU nominal votes at municipality level
	gen tvotescsu_1969 = csu_1969_histgem*total_votes_1969_histgem/100
	* Total CSU votes at historical district
	bysort id_hist_landkreis: egen county_csuvotes_1969 = total(tvotescsu_1969)
	* Total votes at historical district
	bysort id_hist_landkreis: egen county_tvotes_1969 = total(total_votes_1969_histgem)
	* Share of CSU votes at historical district
	bysort id_hist_landkreis: gen county_csushare_1969 = 100*county_csuvotes_1969/county_tvotes_1969

* Summary Statistics	
	sum county_csushare_19*

** Data cleaning
	drop csu_* total_votes_*  county_csuvotes_* county_tvotes_*


** Repression variable
	tab treated_town_cont
	bysort id_hist_landkreis: egen did_repression = total(treated_town_cont)
	drop treated_town_cont

** Population as of 1933
	bysort id_hist_landkreis: egen did_population = total(population_1933)
	drop population_1933

** Total priests repressed per population at historical district level
	gen did_pc_repression = 1000*did_repression/did_population

	
** Post-war elegible voters aggregated at district level
	bysort id_hist_landkreis: egen did_eligible_voters_1949 = total(eligible_voters1949_histgem)
	bysort id_hist_landkreis: egen did_eligible_voters_1953 = total(eligible_voters1953_histgem)
	bysort id_hist_landkreis: egen did_eligible_voters_1957 = total(eligible_voters1957_histgem)
	bysort id_hist_landkreis: egen did_eligible_voters_1961 = total(eligible_voters1961_histgem)
	bysort id_hist_landkreis: egen did_eligible_voters_1965 = total(eligible_voters1965_histgem)
	bysort id_hist_landkreis: egen did_eligible_voters_1969 = total(eligible_voters1969_histgem)

** Data Cleaning
drop eligible_voters1949_histgem eligible_voters1953_histgem eligible_voters1957_histgem eligible_voters1961_histgem eligible_voters1965_histgem eligible_voters1969_histgem

** Keep relevant variables
	keep id_hist_landkreis county_csushare_* did_pc_repression did_eligible_voters_*

** Keep one observation per historical district 
	bysort id_hist_landkreis: gen case = _n
	keep if case == 1
	drop case

save "did_post_war.dta", replace



***************************************************
** Step 4 
** Change in Eligible Voters Before the War
***************************************************

** To compute the number of eligible voters before the war, 
** we need to expand the pre-war data (Hanisch 1989), keep 
** one unit per level, and then collapse it at the historical
** district. (See step 1).

** Subset sample to larger municipalities 

	use "2_historical_gemeinde_1949_1969.dta", clear

	keep id_hist_landkreis id_hist_gemeinde name_hist_gemeinde s_agglvl eligible_voters_*

	keep if s_agglvl == "GEMEINDEN AB 2000 E."
	* Identify larger municipalities in each district
	gen bigger_town = 1 if s_agglvl == "GEMEINDEN AB 2000 E."
	* Assign a unique id to larger municipalities
	bysort id_hist_landkreis: gen id_biggertown = _n if bigger_town == 1
	drop bigger_town
	save "only_bigger_towns.dta", replace

** Drop bigger municipalities from main, and replace by dataset with unique identifiers	
	use "2_historical_gemeinde_1949_1969.dta", clear
	drop if s_agglvl == "GEMEINDEN AB 2000 E."
	append using "only_bigger_towns.dta"

	gen level = . 
	replace level = 1 if s_agglvl == "KREISE O.GEMEINDEN >"
	replace level = 2 if s_agglvl == "RESTKREISE (GEM.< 20"
	replace level = 3 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 1
	replace level = 4 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 2
	replace level = 5 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 3
	replace level = 6 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 4
	replace level = 7 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 5
	replace level = 8 if s_agglvl == "GEMEINDEN AB 2000 E." & id_biggertown == 6

** Keep one observation per district-level
	bysort id_hist_landkreis level: gen count = _n
	keep if count == 1
	drop count

** Compute the total number of eligible voters by district. 
** Variable names change in anticipation of "reshape" command (see note in line 338)
	bysort id_hist_landkreis: egen did_eligible_voters_1920 = total(eligible_voters_1920)
	bysort id_hist_landkreis: egen did_eligible_voters_1924 = total(eligible_voters_1924_may)
	bysort id_hist_landkreis: egen did_eligible_voters_1925 = total(eligible_voters_1924_dec)
	bysort id_hist_landkreis: egen did_eligible_voters_1928 = total(eligible_voters_1928)
	bysort id_hist_landkreis: egen did_eligible_voters_1930 = total(eligible_voters_1930)
	bysort id_hist_landkreis: egen did_eligible_voters_1931 = total(eligible_voters_1932_jul)
	bysort id_hist_landkreis: egen did_eligible_voters_1932 = total(eligible_voters_1932_nov)
	bysort id_hist_landkreis: egen did_eligible_voters_1933 = total(eligible_voters_1933)

** Keep relevant variables
	drop eligible_voters_* id_bigger level  s_agglvl name_hist_gemeinde id_hist_gemeinde

** Keep one observation per district 
	bysort id_hist_landkreis: gen case = _n
	keep if case == 1
	drop case

save "eligible_voters_landkreis_prewar.dta", replace


*******************************************************
** Step 5 
** Merge Datasets created in Steps 1 to 4, all of
** them already indexed at historical district level 
*******************************************************

use "did_post_war.dta", clear

** Mege with pre-war eligible voters
merge 1:1 id_hist_landkreis using "eligible_voters_landkreis_prewar.dta"
drop _merge

* Merge with pre-war weighted vote shares
merge 1:1 id_hist_landkreis using "did_prewar.dta"

** Change labels in anticipation of reshape
rename w_end_cath_20 county_csushare_1920
rename w_end_cath_24 county_csushare_1924
rename w_end_cath_25 county_csushare_1925
rename w_end_cath_28 county_csushare_1928
rename w_end_cath_30 county_csushare_1930
rename w_end_cath_31 county_csushare_1931
rename w_end_cath_32 county_csushare_1932
rename w_end_cath_33 county_csushare_1933

sort id_hist_landkreis

*******************************************************
** Step 6: 
** Reshape the Database
** The resulting dataset is indexed at the 
** district-year level
*******************************************************

reshape long county_csushare_ did_eligible_voters_, i(id_hist_landkreis) j(tool)
rename tool year

** Panel structure
	egen election_year = group(year)
	xtset id_hist_landkreis election_year
	label variable election_year "Election Year"
	
* Treatment Condition (above/below mean)
	egen mean_did_pc_repression = mean(did_pc_repression)
	gen high_pc_repression_histlandk = .
	replace high_pc_repression_histlandk = 1 if did_pc_repression>= mean_did_pc_repression & did_pc_repression!=.
	replace high_pc_repression_histlandk = 0 if did_pc_repression< mean_did_pc_repression & did_pc_repression!=.
	replace high_pc_repression_histlandk = 0 if year<1940

* Continuous Measure of Repression (complete variable)
	replace did_pc_repression = 0 if year<1940

* Rename for clarification purposes
	rename did_pc_repression cont_pc_repression_histlandk
	rename county_csushare_ catholic_vote_share_histlandk
	
* Growth of Eligible Voters
	gen diff_did_eligible_voters_ = d.did_eligible_voters_
	gen lag_did_eligible_voters_ = l.did_eligible_voters_
	gen growth_voters_histlandk = diff_did_eligible_voters_/lag_did_eligible_voters_

* Erase auxiliary datasets
	erase only_bigger_towns.dta
	erase did_post_war.dta
	erase did_prewar.dta
	erase eligible_voters_landkreis_prewar.dta

* Clean variables
keep catholic_vote_share_histlandk cont_pc_repression_histlandk high_pc_repression_histlandk growth_voters_histlandk id_hist_landkreis election_year 

* Label variables
label variable catholic_vote_share_histlandk "Catholic vote (Pop-weighted hist landkreis)"
label variable cont_pc_repression_histlandk "Repressed priests per cap (continuous, hist landkreis)"
label variable high_pc_repression_histlandk "High per capita priest repression (indicator, hist landkreis)"
label variable growth_voters_histlandk "Growth of eligible voters (hist landkreis)"

save "5_did_landkreis_year_data.dta", replace








